library(Seurat)
library(RColorBrewer)
library(tidyverse)
library(MetBrewer)
library(cowplot)
library(patchwork)
library(DescTools)
library(scales)
library(ggrepel)
library(ggpubr)
library(ComplexHeatmap)
library(circlize)
library(msigdbr)
library(clusterProfiler)
library(enrichplot)
library(org.Rn.eg.db)
library(GOSemSim)
library(ggtree)
library(ggdendro)
library(viridis)
library(aplot)
library(ggtext)
source("util.R")
remote_dir <- "/Volumes/TOSHIBA/molpat/APECED_rats/analysis/single_cell/sc_rat_spleen/data/"
seurat_obj <- readRDS(paste0(remote_dir, "integrated_seurat_object.rds"))
5A
col_vec <- colorRampPalette(brewer.pal(9, "Set1"))(21)[1:18]
color_mapping <- data.frame(Cell.type = levels(seurat_obj$cell.type),
Color = col_vec)
umap_plot <- DimPlot(seurat_obj,
label = TRUE,
label.size = 5,
cols = col_vec,
group.by = "cell.type.idx") +
theme(legend.position = "none") +
labs(subtitle = NULL, title = NULL)
legend <- DimPlot(seurat_obj,
group.by = "legend",
cols = col_vec,
label = TRUE)
legend <- plot_grid(get_legend(legend))
umap_plot + legend

5B
ego_s <- read_tsv("data/Figure_5/GO_simplified.tsv")
## Rows: 38 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (5): ID, Description, GeneRatio, BgRatio, geneID
## dbl (4): pvalue, p.adjust, qvalue, Count
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
ego_s <- as_tibble(ego_s)
ego_s$GeneRatio <- sapply(ego_s$GeneRatio, function(x) {eval(parse(text=x))})
ego_s <- mutate(ego_s, log10p = -log10(p.adjust), DescriptionTrunc = StrTrunc(Description, maxlen = 50, ellipsis = "...", wbound = FALSE))
P_B <- ggplot(head(ego_s, 15),
aes(x = GeneRatio, y = reorder(DescriptionTrunc, GeneRatio),
color = `log10p`, size = Count)) +
geom_point() +
geom_point(aes(size=Count), shape = 21, colour="black", stroke=0.5) +
scale_size(range = c(0, 4)) +
scale_colour_gradient2(
low = ("white"),
mid = ("white"),
high = scales::muted("red"),
midpoint = -log10(0.05)
) +
ylab("") +
xlab("GeneRatio") +
theme_cowplot() +
coord_cartesian(xlim=c(0, 0.2)) +
scale_y_discrete(position = "right") +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
P_B

5C
de.results <- read_tsv("data/Figure_5/benoist_DE_results.tsv")
## Rows: 42690 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (3): Gene.name, Cell.type, description
## dbl (6): logFC, logCPM, F, PValue, FDR, DE
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
tonic_effect_processed <- read_tsv("data/Figure_5/tonic_effect_processed.txt")
## Rows: 548 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (4): GeneSymbol, Cell.type, ortholog_name, Group
## dbl (4): ProbeSetID, IFN.FC, IFNAR_KO.FC, IFNAR_KO.pval
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
de.results <- de.results %>%
left_join(tonic_effect_processed %>% dplyr::select(ortholog_name, Group) %>% distinct(), by = c("Gene.name" = "ortholog_name")) %>%
mutate(DE = if_else(FDR < .05 & logFC > 1, "Upregulated", "Not changed"),
DE = if_else(FDR < .05 & logFC < -1, "Downregulated", DE)) %>%
replace_na(list(Group = "Unknown"))
## Warning in left_join(., tonic_effect_processed %>% dplyr::select(ortholog_name, : Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 4 of `x` matches multiple rows in `y`.
## ℹ Row 22 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
## "many-to-many"` to silence this warning.
B_ORA <- fisher.test(as.numeric(de.results[de.results$Cell.type=="B",]$DE=="Downregulated"), as.numeric(de.results[de.results$Cell.type=="B",]$Group=="Tonic-sensitive"))
MF_ORA <- fisher.test(as.numeric(de.results[de.results$Cell.type=="MF",]$DE=="Downregulated"), as.numeric(de.results[de.results$Cell.type=="MF",]$Group=="Tonic-sensitive"))
p1 <- plot_tonic_ISGs(de.results[de.results$Cell.type=="B",]) + ggtitle("B cell") + annotate("text", x = -5.5, y = 7.2, hjust=0, col="black", label = paste0("OR=",round(B_ORA$estimate, 1),"\np=",format(B_ORA$p.value, digits = 2)))
p2 <- plot_tonic_ISGs(de.results[de.results$Cell.type=="MF",]) + ggtitle("Macrophage") + annotate("text", x = -5.5, y = 7.2, hjust=0, col="black", label = paste0("OR=",round(MF_ORA$estimate, 1),"\np=",format(MF_ORA$p.value, digits = 2)))
P_C <- p1 + p2
P_C
## Warning: ggrepel: 8 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 1 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

5D
tonic_effect_processed <- read_tsv("data/Figure_5/tonic_effect_processed.txt")
## Rows: 548 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (4): GeneSymbol, Cell.type, ortholog_name, Group
## dbl (4): ProbeSetID, IFN.FC, IFNAR_KO.FC, IFNAR_KO.pval
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
cell_type <- seurat_obj$cell.type
levels(cell_type) <- c("T4", "T4", "T8", "T8", "NK", "NK", "B", "B", "B", "B", "B", "B", "MF", "MF", "MF", "DC", "DC", "GN")
seurat_obj$cell.type.lvl1 <- cell_type
celltype_vec <- levels(seurat_obj$cell.type.lvl1)
seurat_obj$celltype.gntp <- paste(seurat_obj$cell.type.lvl1, seurat_obj$genotype, sep = "_")
seurat_obj$celltype.gntp <- ordered(seurat_obj$celltype.gntp, levels = paste(rep(celltype_vec, each = 2), rep(c("HE", "KO"), length(celltype_vec)), sep = "_"))
Idents(seurat_obj) <- "celltype.gntp"
DefaultAssay(seurat_obj) <- "RNA"
dge_df = data.frame()
for (i in unique(celltype_vec)) {
dge_df_sub <- FoldChange(seurat_obj, ident.1 = paste(i, "_KO", sep=""), ident.2 = paste(i, "_HE", sep=""), verbose = TRUE) %>%
rownames_to_column("gene") %>%
mutate(Cell.type = i)
dge_df <- rbind(dge_df, dge_df_sub)
}
mat <- dge_df %>%
dplyr::select(-c(pct.1, pct.2)) %>%
pivot_wider(names_from = "Cell.type", values_from = "avg_log2FC") %>%
column_to_rownames("gene")
B
data_to_plot <- tonic_effect_processed %>%
filter(Cell.type=="B") %>%
dplyr::select(ortholog_name, IFNAR_KO.FC, Group) %>%
drop_na(ortholog_name) %>%
left_join(dge_df %>% filter(Cell.type == "B") %>% dplyr::select(gene, avg_log2FC), by=c("ortholog_name"="gene")) %>%
mutate(avg_FC = 2^avg_log2FC, IFNAR_KO.FC = 1/IFNAR_KO.FC) %>%
drop_na(avg_log2FC)
col_vec <- c("#E41A1C",
"#d3d3d3",
"#377EB8")
names(col_vec) <- c("Tonic-sensitive", "NA", "Tonic-insensitive")
lab_df <- filter(data_to_plot, Group == "Tonic-sensitive")
cor_test <- cor.test(data_to_plot$IFNAR_KO.FC, data_to_plot$avg_FC, method = "spearman")
## Warning in cor.test.default(data_to_plot$IFNAR_KO.FC, data_to_plot$avg_FC, :
## Cannot compute exact p-value with ties
p_D1 <- ggplot(data_to_plot, aes(x=IFNAR_KO.FC, y=avg_FC, color=Group, label = ortholog_name)) +
geom_point(size=1) +
theme_cowplot(16) +
geom_hline(yintercept = 1, lty = 2) +
geom_vline(xintercept = 1, lty = 2) +
labs(x = "FC (Ifnar1 KO / Ctrl)", y = "FC (Aire KO / Ctrl)") +
geom_text_repel(data = lab_df,
aes(x = IFNAR_KO.FC, y = avg_FC, label = ortholog_name),
min.segment.length = 1, seed = 3,
box.padding = 1,
show.legend = FALSE,
max.overlaps =33,
force_pull=2,
force=100,
size=5
) +
annotate("text", x = 0.1, y = 5, hjust = 0,
label = paste0("rho=", round(cor_test$estimate, 2),
"\n", "p=", format(cor_test$p.value, digits=3))) +
scale_color_manual(values = col_vec) +
scale_x_log10() +
scale_y_log10() +
theme(legend.position = "none") +
ggtitle("B cell")
MF
data_to_plot <- tonic_effect_processed %>%
filter(Cell.type=="MF") %>%
dplyr::select(ortholog_name, IFNAR_KO.FC, Group) %>%
drop_na(ortholog_name) %>%
left_join(dge_df %>% filter(Cell.type == "MF") %>% dplyr::select(gene, avg_log2FC), by=c("ortholog_name"="gene")) %>%
mutate(avg_FC = 2^avg_log2FC, IFNAR_KO.FC = 1/IFNAR_KO.FC) %>%
drop_na(avg_log2FC)
col_vec <- c("#E41A1C",
"#d3d3d3",
"#377EB8")
names(col_vec) <- c("Tonic-sensitive", "NA", "Tonic-insensitive")
lab_df <- filter(data_to_plot, Group == "Tonic-sensitive")
cor_test <- cor.test(data_to_plot$IFNAR_KO.FC, data_to_plot$avg_FC, method = "spearman")
## Warning in cor.test.default(data_to_plot$IFNAR_KO.FC, data_to_plot$avg_FC, :
## Cannot compute exact p-value with ties
p_D2 <- ggplot(data_to_plot, aes(x=IFNAR_KO.FC, y=avg_FC, color=Group, label = ortholog_name)) +
geom_point(size=1) +
theme_cowplot(16) +
geom_hline(yintercept = 1, lty = 2) +
geom_vline(xintercept = 1, lty = 2) +
labs(x = "FC (Ifnar1 KO / Ctrl)", y = "FC (Aire KO / Ctrl)") +
geom_text_repel(data = lab_df,
aes(x = IFNAR_KO.FC, y = avg_FC, label = ortholog_name),
min.segment.length = 1, seed = 10,
box.padding = 1,
show.legend = FALSE,
max.overlaps =25,
force_pull=100,
force=100,
nudge_x=.2,
size=5
) +
annotate("text", x = 0.05, y = 10, hjust = 0,
label = paste0("rho=", round(cor_test$estimate, 2),
"\n", "p=", format(cor_test$p.value, digits=3))) +
scale_color_manual(values = col_vec) +
scale_x_log10() +
scale_y_log10() +
theme(legend.position = "none") +
ggtitle("Macrophage")
p_D <- p_D1 + p_D2
p_D
## Warning: ggrepel: 45 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 40 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

5E
tonic_effect <- read_tsv("data/Figure_5/tonic_effect_processed.txt")
## Rows: 548 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (4): GeneSymbol, Cell.type, ortholog_name, Group
## dbl (4): ProbeSetID, IFN.FC, IFNAR_KO.FC, IFNAR_KO.pval
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
de_data <- read_tsv("data/Figure_5/Tcells_DE_data.tsv")
## Rows: 296380 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (5): ID, SYMBOL, Cell.type, Site, Group
## dbl (8): KO.log2EXP, HET.log2EXP, FC, P.VAL, FDR, DE, ISG, Log2FC
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
exprs_mtx <- read_tsv("data/Figure_5/exprs_mtx.txt")
## Rows: 17447 Columns: 71
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (1): SYMBOL
## dbl (70): A01_Het-P8-7_LN-Tconv, A02_Het-P11-9_LN-Tconv, A03_Het-P10-4_LN-Tc...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
exprs_mtx$SYMBOL <- NULL
de_data <- de_data %>%
filter(Site != "Pooled", ISG == 1) %>%
dplyr::select(SYMBOL, Log2FC, Site, Cell.type) %>%
mutate(Group = paste(Site, Cell.type, sep = "_")) %>%
dplyr::select(-c(Site, Cell.type)) %>%
distinct(SYMBOL, Group, .keep_all = T) %>%
pivot_wider(names_from = Group, values_from = Log2FC) %>%
column_to_rownames("SYMBOL") %>%
drop_na() %>%
as.matrix()
col_names <- colnames(de_data)
colnames(de_data) <- str_split(col_names, "_", Inf, T)[,2]
tonic_effect_sub <- tonic_effect[match(rownames(de_data), tonic_effect$ortholog_name),] %>%
replace_na(list(Group = "No data"))
set.seed(42)
cl <- kmeans(de_data, centers = 3)$cluster
colAnn <- HeatmapAnnotation(Tonicity = tonic_effect_sub$Group,
which = 'col',
col = list(Tonicity = c("Tonic-sensitive" = "#E41A1C", "Tonic-insensitive" = "#377EB8", "No data"="white")),
simple_anno_size = unit(.5, "cm"),
gap = unit(1, 'cm'),
show_annotation_name = T)
P_E = Heatmap(t(de_data),
name = "log2FC",
#col = colorRamp2(seq(-3, 3, length.out=9), rev(brewer.pal(n=9, name="Spectral"))),
row_split = factor(str_split(col_names, "_", Inf, T)[,1]),
column_split = cl,
border=T,
show_row_dend = F,
show_column_dend = F,
show_column_names = F,
top_annotation = colAnn)
P_E

5F
de_data <- read_tsv("data/Figure_5/Tcells_DE_data.tsv")
## Rows: 296380 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (5): ID, SYMBOL, Cell.type, Site, Group
## dbl (8): KO.log2EXP, HET.log2EXP, FC, P.VAL, FDR, DE, ISG, Log2FC
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
go.result_df <- read_tsv("data/Figure_5/Tcells_GO_full.tsv")
## Rows: 269 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (6): Cell.type, ID, Description, GeneRatio, BgRatio, geneID
## dbl (4): pvalue, p.adjust, qvalue, Count
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
go.result.simpl_df <- read_tsv("data/Figure_5/Tcells_GO_simplified.tsv")
## Rows: 19 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (6): Cell.type, ID, Description, GeneRatio, BgRatio, geneID
## dbl (4): pvalue, p.adjust, qvalue, Count
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
de_data$SYMBOL <- str_split(de_data$SYMBOL, "; ", Inf, T)[,1]
list_names <- unique(de_data$Cell.type)
de_data_list <- group_split(de_data, Cell.type)
de_data_list <- as.list(de_data_list)
names(de_data_list) <- list_names
hsGO <- godata('org.Rn.eg.db', ont="BP")
## Warning in godata("org.Rn.eg.db", ont = "BP"): use 'annoDb' instead of 'OrgDb'
## preparing gene to GO mapping data...
## preparing IC data...
go.result.simpl_df$GeneRatioDecimal <- sapply(go.result.simpl_df$GeneRatio, function(x) {eval(parse(text=x))})
data_to_plot <- go.result.simpl_df %>%
group_by(Cell.type) %>%
slice_min(order_by = p.adjust, n = 25)
data_to_plot <- go.result.simpl_df[go.result.simpl_df$ID %in% go.result.simpl_df$ID,]
data_to_plot <- data_to_plot %>%
mutate(log10p = -log10(p.adjust),
DescriptionTrunc = StrTrunc(Description, maxlen = 60, ellipsis = "...", wbound = FALSE))
sim_mtx <- mgoSim(unique(data_to_plot$ID), unique(data_to_plot$ID), semData=hsGO, measure="Wang", combine=NULL)
dist_mtx <- 1/sim_mtx
col_clust <- hclust(as.dist(dist_mtx))
ddgram_col <- as.dendrogram(col_clust)
ggtree_plot_col <- ggtree(ddgram_col)
sc <- scale_colour_gradient2(
low = "white",
mid = "white",
high = scales::muted("red")
)
data_to_plot$ID <- factor(data_to_plot$ID, levels=col_clust$labels[col_clust$order])
dotplot <- ggplot(data_to_plot, aes(x=Cell.type, y = DescriptionTrunc, color = log10p, size = GeneRatioDecimal)) +
geom_point() +
geom_point(aes(size=GeneRatioDecimal), shape = 21, colour="black", stroke=0.5) +
cowplot::theme_cowplot() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
axis.line = element_blank()) +
ylab('') +
theme(axis.ticks = element_blank()) +
scale_y_discrete(position = "right") +
sc
ggtree_plot_col <- ggtree_plot_col + aplot::ylim2(dotplot)
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.
P_F <- ggtree_plot_col +
dotplot +
plot_layout(ncol = 2, widths = c(0.7, 4))
P_F

S3
markers_df <- read_tsv("data/Figure_5/marker_genes.txt")
## Rows: 46 Columns: 2
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (2): gene, gene.corresp
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
color_mapping <- colorRampPalette(brewer.pal(9, "Set1"))(21)[1:18]
names(color_mapping) <- levels(seurat_obj$cell.type)
markers_df$Color <- color_mapping[match(markers_df$gene.corresp, names(color_mapping))]
markers_df <- markers_df %>%
replace_na(list(Color = "#b1b1b1")) %>%
mutate(y.label = paste("<span style = 'color:",
Color,
";'>",
gene,
"</span>", sep = ""))
data.to.plot <- dot_plot(object = seurat_obj[,seurat_obj$genotype=="HE"],
assay = "RNA",
features = markers_df$gene,
cluster.idents = TRUE,
group.by = "cell.type") %>%
dplyr::rename(`% Expressing` = pct.exp, `Cell type` = id) %>%
left_join(markers_df, by = c("features.plot" = "gene"))
## Warning: The following requested variables were not found: Csfr1, Cd51
## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`.
## ℹ See also `vignette("ggplot2-in-packages")` for more information.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
data.to.plot$features.plot <- factor(data.to.plot$features.plot, levels = unique(markers_df$gene))
data.to.plot <- data.to.plot %>%
filter(`% Expressing` > 1)
data.to.plot$y.label <- factor(data.to.plot$y.label, levels = markers_df$y.label)
data.to.plot$`Cell type` <- factor(data.to.plot$`Cell type`, levels = (levels(seurat_obj$cell.type)))
dotplot <- data.to.plot %>%
ggplot(aes(y=features.plot, x = `Cell type`, color = avg.exp.scaled, size = `% Expressing`)) +
geom_point() +
geom_point(aes(size=`% Expressing`), shape = 21, colour="black", stroke=0.5) +
theme_cowplot(13) +
scale_radius(limits = c(0, 100)) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
axis.text.y = element_markdown(),
axis.line = element_blank()) +
ylab(NULL) +
xlab(NULL) +
theme(axis.ticks = element_blank()) +
scale_x_discrete(position = "top") +
viridis::scale_colour_viridis(option="magma") +
guides(size=guide_legend(override.aes=list(shape=21, colour="black", fill="white")))
labels <- ggplot(data.to.plot %>%
distinct(`Cell type`),
aes(y = factor(1), x = `Cell type`, fill = `Cell type`)) +
geom_tile() +
scale_fill_manual(values = color_mapping) +
theme_nothing() +
xlim2(dotplot) +
xlab(NULL)
P <- labels +
dotplot +
plot_layout(nrow = 2, heights = c(.2, 9))
P

S4A
# This ISG list is from Interferome DB
rat_isgs <- read.table("data/Figure_5/rat_isgs.tsv", header = TRUE, sep = "\t")
# DE analysis genes
de.results <- read.table("data/Figure_5/DE_results.tsv", header = TRUE, sep = "\t")
# Preselecting cell types
selected_cell_types <- de.results %>% filter(FDR < 0.05) %>% group_by(Cell.type) %>% summarise(Count = n()) %>% pull(Cell.type)
# Enrichment of ISGs in downregulated genes
enrichment_df = data.frame()
for (Cell_type in selected_cell_types) {
de.results_sub <- de.results %>%
dplyr::filter(Cell.type == Cell_type) %>%
mutate(DE = ifelse(FDR < 0.05 & logFC < 1, 1, 0)) %>%
dplyr::select(Gene.name, DE)
rat_de_isg <- de.results_sub %>%
left_join(rat_isgs, by=c("Gene.name"="Rat_symbol")) %>%
replace_na(list(ISG = 0))
contingency.table <- table(rat_de_isg$ISG, rat_de_isg$DE)
if (ncol(contingency.table) < 2) {
enrichment_df <- rbind(
enrichment_df,
data.frame(Cell.type = Cell_type,
pval = NA,
OR = NA,
CI.low = NA,
CI.hi = NA))
next
}
test <- fisher.test(contingency.table)
enrichment_df <- rbind(
enrichment_df,
data.frame(Cell.type = Cell_type,
pval = test$p.value,
OR = test$estimate,
CI.low = test$conf.int[1],
CI.hi = test$conf.int[2])
)
}
enrichment_df$p.adj <- p.adjust(enrichment_df$pval)
enrichment_df <- enrichment_df %>%
mutate(Col = if_else(p.adj < .05, "p<0.05", "p>0.05"))
col_vec <- as.vector(met.brewer("Nizami", 5)[4])
enrichment_df <- enrichment_df %>%
mutate(hjust = ifelse(CI.hi==Inf, 1.2, 1.2),
hjust = ifelse(is.na(CI.hi), NA, hjust))
enrichment_df$Cell.type <- factor(enrichment_df$Cell.type, levels = enrichment_df$Cell.type[order(enrichment_df$OR)])
P_S4A <- ggplot(enrichment_df, aes(x = Cell.type, y = OR, ymin = CI.low, ymax = CI.hi, color=Col)) +
geom_linerange() +
geom_pointrange() +
geom_text(aes(label = formatC(p.adj, format = "e")), hjust=enrichment_df$hjust, vjust=-.4) +
scale_y_log10() +
coord_flip() +
#geom_hline(yintercept = 1, linetype = "dashed") +
theme_cowplot(16) +
#ylim(0, 700) +
scale_color_manual(values = c(as.vector(met.brewer("Nizami", 5)[4]),
"grey")) +
ylab("OR") +
xlab(NULL) +
theme(legend.position = "none", axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
P_S4A

S3
gntp_col <- met.brewer("Nizami", 2)[c(2, 1)]
names(gntp_col) <- c("HE", "KO")
P_S3 <- seurat_obj@meta.data %>%
group_by(genotype, seq_folder, cell.type) %>%
summarise(Count = n()) %>%
mutate(Freq = Count/sum(Count)) %>%
ggplot(aes(cell.type, Freq, fill = genotype)) +
stat_summary(geom = "bar", fun = mean, position = "dodge") +
stat_summary(geom = "errorbar", fun.data = mean_se, position = "dodge") +
geom_point(position = position_dodge(0.8), alpha=0.3, size = 0.5) +
scale_fill_manual(values = gntp_col) +
theme_cowplot() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
labs(x = NULL, y = "Frequency")
## `summarise()` has grouped output by 'genotype', 'seq_folder'. You can override
## using the `.groups` argument.
P_S3

S4B
p1 <- plot_scatter("Activated NK cell")
## As of Seurat v5, we recommend using AggregateExpression to perform pseudo-bulk analysis.
## This message is displayed once per session.
p2 <- plot_scatter("Macrophage (M1-like)")
p3 <- plot_scatter("Neutrophil")
p4 <- plot_scatter("Naive CD4+ T cell")
p5 <- plot_scatter("pDC")
p6 <- plot_scatter("NK cell")
p7 <- plot_scatter("FO B cell")
p8 <- plot_scatter("CD8+ Tcm cell")
p9 <- plot_scatter("Memory B cell 2")
P_S4B <- p1 + p2 + p3 + p4 + p5 + p6 + p7 + p8 + p9 + plot_layout(ncol = 3)
P_S4B
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_text_repel()`).
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_text_repel()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_text_repel()`).
## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_text_repel()`).
## Warning in scale_y_log10(): log-10 transformation introduced infinite values.
## log-10 transformation introduced infinite values.
## Warning in transformation$transform(coord_limits): NaNs produced
## Warning in transformation$transform(coord_limits): NaNs produced
## Warning: Removed 7 rows containing missing values or values outside the scale range
## (`geom_text_repel()`).
## Removed 7 rows containing missing values or values outside the scale range
## (`geom_text_repel()`).
## Warning: Removed 6 rows containing missing values or values outside the scale range
## (`geom_text_repel()`).
## Warning: ggrepel: 1 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 5 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 2 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 12 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 2 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

sessionInfo()
## R version 4.4.3 (2025-02-28)
## Platform: aarch64-apple-darwin20
## Running under: macOS Sequoia 15.4
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRlapack.dylib; LAPACK version 3.12.0
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## time zone: America/Indiana/Indianapolis
## tzcode source: internal
##
## attached base packages:
## [1] stats4 grid stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] ggtext_0.1.2 aplot_0.2.5 viridis_0.6.5
## [4] viridisLite_0.4.2 ggdendro_0.2.0 ggtree_3.14.0
## [7] GOSemSim_2.32.0 org.Rn.eg.db_3.20.0 AnnotationDbi_1.68.0
## [10] IRanges_2.40.1 S4Vectors_0.44.0 Biobase_2.66.0
## [13] BiocGenerics_0.52.0 enrichplot_1.26.6 clusterProfiler_4.14.6
## [16] msigdbr_10.0.2 circlize_0.4.16 ComplexHeatmap_2.22.0
## [19] ggpubr_0.6.0 ggrepel_0.9.6 scales_1.3.0
## [22] DescTools_0.99.60 patchwork_1.3.0 cowplot_1.1.3
## [25] MetBrewer_0.2.0 lubridate_1.9.4 forcats_1.0.0
## [28] stringr_1.5.1 dplyr_1.1.4 purrr_1.0.4
## [31] readr_2.1.5 tidyr_1.3.1 tibble_3.2.1
## [34] ggplot2_3.5.2 tidyverse_2.0.0 RColorBrewer_1.1-3
## [37] Seurat_5.2.1 SeuratObject_5.0.2 sp_2.2-0
##
## loaded via a namespace (and not attached):
## [1] fs_1.6.6 matrixStats_1.5.0 spatstat.sparse_3.1-0
## [4] httr_1.4.7 doParallel_1.0.17 tools_4.4.3
## [7] sctransform_0.4.1 backports_1.5.0 R6_2.6.1
## [10] lazyeval_0.2.2 uwot_0.2.3 GetoptLong_1.0.5
## [13] litedown_0.7 withr_3.0.2 gridExtra_2.3
## [16] progressr_0.15.1 cli_3.6.4 Cairo_1.6-2
## [19] spatstat.explore_3.4-2 fastDummies_1.7.5 labeling_0.4.3
## [22] sass_0.4.10 mvtnorm_1.3-3 spatstat.data_3.1-6
## [25] proxy_0.4-27 ggridges_0.5.6 pbapply_1.7-2
## [28] commonmark_1.9.5 yulab.utils_0.2.0 gson_0.1.0
## [31] DOSE_4.0.1 R.utils_2.13.0 parallelly_1.43.0
## [34] readxl_1.4.5 rstudioapi_0.17.1 RSQLite_2.3.9
## [37] gridGraphics_0.5-1 generics_0.1.3 shape_1.4.6.1
## [40] vroom_1.6.5 ica_1.0-3 spatstat.random_3.3-3
## [43] car_3.1-3 GO.db_3.20.0 Matrix_1.7-3
## [46] abind_1.4-8 R.methodsS3_1.8.2 lifecycle_1.0.4
## [49] yaml_2.3.10 carData_3.0-5 qvalue_2.38.0
## [52] Rtsne_0.17 blob_1.2.4 promises_1.3.2
## [55] crayon_1.5.3 ggtangle_0.0.6 miniUI_0.1.2
## [58] lattice_0.22-7 haven_2.5.4 KEGGREST_1.46.0
## [61] pillar_1.10.2 knitr_1.50 fgsea_1.32.4
## [64] rjson_0.2.23 boot_1.3-31 gld_2.6.7
## [67] future.apply_1.11.3 codetools_0.2-20 fastmatch_1.1-6
## [70] glue_1.8.0 ggfun_0.1.8 spatstat.univar_3.1-2
## [73] data.table_1.17.0 treeio_1.30.0 vctrs_0.6.5
## [76] png_0.1-8 spam_2.11-1 cellranger_1.1.0
## [79] gtable_0.3.6 assertthat_0.2.1 cachem_1.1.0
## [82] xfun_0.52 mime_0.13 survival_3.8-3
## [85] iterators_1.0.14 fitdistrplus_1.2-2 ROCR_1.0-11
## [88] nlme_3.1-168 bit64_4.6.0-1 RcppAnnoy_0.0.22
## [91] GenomeInfoDb_1.42.3 bslib_0.9.0 irlba_2.3.5.1
## [94] KernSmooth_2.23-26 colorspace_2.1-1 DBI_1.2.3
## [97] Exact_3.3 tidyselect_1.2.1 bit_4.6.0
## [100] compiler_4.4.3 expm_1.0-0 xml2_1.3.8
## [103] plotly_4.10.4 msigdbdf_24.1.0 lmtest_0.9-40
## [106] digest_0.6.37 goftest_1.2-3 spatstat.utils_3.1-3
## [109] rmarkdown_2.29 XVector_0.46.0 htmltools_0.5.8.1
## [112] pkgconfig_2.0.3 fastmap_1.2.0 rlang_1.1.6
## [115] GlobalOptions_0.1.2 htmlwidgets_1.6.4 UCSC.utils_1.2.0
## [118] shiny_1.10.0 farver_2.1.2 jquerylib_0.1.4
## [121] zoo_1.8-14 jsonlite_2.0.0 BiocParallel_1.40.2
## [124] R.oo_1.27.0 magrittr_2.0.3 ggplotify_0.1.2
## [127] Formula_1.2-5 GenomeInfoDbData_1.2.13 dotCall64_1.2
## [130] munsell_0.5.1 Rcpp_1.0.14 ape_5.8-1
## [133] babelgene_22.9 reticulate_1.42.0 stringi_1.8.7
## [136] rootSolve_1.8.2.4 zlibbioc_1.52.0 MASS_7.3-65
## [139] plyr_1.8.9 parallel_4.4.3 listenv_0.9.1
## [142] lmom_3.2 deldir_2.0-4 Biostrings_2.74.1
## [145] splines_4.4.3 gridtext_0.1.5 tensor_1.5
## [148] hms_1.1.3 igraph_2.1.4 spatstat.geom_3.3-6
## [151] markdown_2.0 ggsignif_0.6.4 RcppHNSW_0.6.0
## [154] reshape2_1.4.4 evaluate_1.0.3 tzdb_0.5.0
## [157] foreach_1.5.2 httpuv_1.6.16 RANN_2.6.2
## [160] polyclip_1.10-7 future_1.40.0 clue_0.3-66
## [163] scattermore_1.2 broom_1.0.8 xtable_1.8-4
## [166] tidytree_0.4.6 e1071_1.7-16 RSpectra_0.16-2
## [169] rstatix_0.7.2 later_1.4.2 class_7.3-23
## [172] memoise_2.0.1 cluster_2.1.8.1 timechange_0.3.0
## [175] globals_0.17.0